## [1] "MMSD-P2"  "MMSD-P7"  "MMSD-P8"  "MMSD-P11" "MMSD-P18" "Madison"

1 Data: The first look

The two data sets used in this analysis are the Madison case data sourced from the Wisconsin DHS and wastewater concentration data produced by the Wisconsin State Laboratory of Hygiene. This wastewater data has entries every couple of days from 25 January 2021 to 04 January 2022.

Date Site FirstConfirmed SevenDayMACases N1 BCoV N2 PMMoV GeoMeanN12 FlowRate temperature equiv_sewage_amt
2021-01-25 MMSD-P11 8 10.571429 280459 8.99 354015 31434596 315097.91 9.31 NA 1
2021-01-25 MMSD-P18 17 8.571429 310278 10.37 447153 29445680 372480.52 11.82 NA 1
2021-01-25 MMSD-P2 13 9.571429 62166 11.38 94675 35099462 76717.44 5.74 NA 1
2021-01-25 MMSD-P7 5 10.000000 64345 10.95 107125 22900169 83023.84 4.35 NA 1
2021-01-25 MMSD-P8 4 12.000000 66705 11.63 110510 22716404 85857.85 6.06 NA 1
2021-01-28 MMSD-P11 19 24.857143 1107390 1.98 1098130 24862163 1102750.28 9.07 NA 1

The case data has a strong weekend effect so for this section we look at a seven day smoothing of cases. The simple display of the data shows the core components of this story. First, wastewater data is noisy. And that there is a clear relationship between the two signals.

FirstImpresionFunc <- function(DF){
  FirstImpressionDF <- DF%>%
    NoNa(SelectedIndVar,SelectedDepVar)#Removing NA
  FirstImpressionGGplot = FirstImpressionDF%>%
  ggplot(aes(x=Date))+#Data depends on time
  geom_point(aes(y=!!sym(SelectedDepVar), color=SelectedDepVar,info=!!sym(SelectedDepVar)),size = 1)+
  geom_line(aes(y=MinMaxFixing(!!sym(SelectedIndVar),!!sym(SelectedDepVar)), 
                color=SelectedIndVar,
                info=!!sym(SelectedIndVar)))+#compares SelectedIndVar to Cases
  geom_line(aes(y=(SevenDayMACases), 
                color="Seven Day MA Cases",
                info=SevenDayMACases))+
  labs(y="Reported cases")+
    facet_wrap(~Site)
  return(ggplotly(FirstImpressionGGplot))
}
FirstImpresionFunc(filter(FullDF,Site=="Madison"))
FirstImpresionFunc(filter(FullDF,Site=="MMSD-P2"))
FirstImpresionFunc(filter(FullDF,Site=="MMSD-P7"))
FirstImpresionFunc(filter(FullDF,Site=="MMSD-P8"))
FirstImpresionFunc(filter(FullDF,Site=="MMSD-P11"))
FirstImpresionFunc(filter(FullDF,Site=="MMSD-P18"))

2 Removing potential outliers

Looking at the wastewater measurements we observe there were some points many times larger than adjacent values hinting at them being outliers. We used the adjacent 10 values on each side and marked points 2.5 standard deviations away from the group mean as outliers.

OutlierRemoveFunc <- function(DF){
  ErrorMarkedDF <- DF%>%#
    mutate(FlagedOutliers = IdentifyOutliers(!!sym(SelectedIndVar),Bin = 21, Action = "Flag"),
           #Manual flagging that method misses due to boundary effect with binning
           FlagedOutliers = ifelse(Date == mdy("01/26/2021") | Date == mdy("01/27/2021"),
                                   TRUE, FlagedOutliers),
           NoOutlierVar = ifelse(FlagedOutliers, NA, !!sym(SelectedIndVar)))
  return(ErrorMarkedDF)
}
MadDF <- OutlierRemoveFunc(filter(FullDF,Site=="Madison"))
P2DF <- OutlierRemoveFunc(filter(FullDF,Site=="MMSD-P2"))
P7DF <- OutlierRemoveFunc(filter(FullDF,Site=="MMSD-P7"))
P8DF <- OutlierRemoveFunc(filter(FullDF,Site=="MMSD-P8"))
P11DF <- OutlierRemoveFunc(filter(FullDF,Site=="MMSD-P11"))
P18DF <- OutlierRemoveFunc(filter(FullDF,Site=="MMSD-P18"))

OutlierPlotFunc <- function(DF){
  #Split N1 into outlier and non outlier for next ggplot
OutLierPlotDF <- DF%>%
  mutate(!!OutlierName := ifelse(FlagedOutliers,!!sym(SelectedIndVar),NA))%>%
           mutate(!!SelectedIndVar := NoOutlierVar)

OutLierPlotObject <- OutLierPlotDF%>%
  filter(!(is.na(!!sym(SelectedIndVar))&is.na(!!sym(OutlierName))))%>%
  ggplot(aes(x=Date))+#Data depends on time 
  geom_line(aes(y=!!sym(SelectedIndVar),
                color=SelectedIndVar, 
                info = !!sym(SelectedIndVar)))+#compares Var to Cases
  geom_point(aes(y=!!sym(OutlierName),
                 color=OutlierName,
                 info = !!sym(OutlierName)))+
  ColorRule+
    facet_wrap(~Site)

#mentioned hand picked list other choices
ReturnPlot <- ggplotly(OutLierPlotObject,tooltip=c("info","Date"))%>%
    layout(yaxis = SecondAxisFormat,
           legend=list(title=list(text=''),x = 1.15, y = 0.9),
           margin = list(l = 50, r = 75, b = 50, t = 50, pad = 4))
return(ReturnPlot)
}
OutlierPlotFunc(MadDF)
OutlierPlotFunc(P2DF)
OutlierPlotFunc(P7DF)
OutlierPlotFunc(P8DF)
OutlierPlotFunc(P11DF)
OutlierPlotFunc(P18DF)
PrepOutlierFunc <- function(DF){
  #Drop Var create Var filter 
UpdatedDF <- DF%>%
  select(-sym(SelectedIndVar))%>%
  rename(!!sym(SelectedIndVar) := NoOutlierVar)
return(UpdatedDF)
}

MadDF <- PrepOutlierFunc(MadDF)
P2DF <- PrepOutlierFunc(P2DF)
P7DF <- PrepOutlierFunc(P7DF)
P8DF <- PrepOutlierFunc(P8DF)
P11DF <- PrepOutlierFunc(P11DF)
P18DF <- PrepOutlierFunc(P18DF)
LoessPlotFunc <- function(DF,SpanConstant = .163){
  MainDF <- DF
  MainDF[[loessVar]] <- loessFit(y=(MainDF[[SelectedIndVar]]), 
                      x=MainDF$Date, #create loess fit of the data
                      span=SpanConstant, #span of .2 seems to give the best result, not rigorously chosen
                      iterations=2)$fitted#2 iterations remove some bad patterns
  
  MainDF <- MainDF%>%
    NoNa(loessVar,"SevenDayMACases")
  SLDLoessGraphic <- MainDF%>%
    ggplot(aes(x=Date))+
    geom_line(aes(y=!!sym(SelectedDepVar), color=SelectedDepVar , info = !!sym(SelectedDepVar)),alpha=.1)+
    geom_line(aes(y=MinMaxFixing(!!sym(SelectedIndVar),!!sym(SelectedDepVar)),
                color=SelectedIndVar,
                info = !!sym(SelectedIndVar)),
            alpha=.2)+
    geom_line(aes(y=SevenDayMACases,
                color="Seven Day MA Cases" , 
                info = SevenDayMACases))+
    geom_line(aes(y=MinMaxFixing(!!sym(loessVar),!!sym(SelectedDepVar),!!sym(SelectedIndVar)), 
                color=loessVar ,
                info = !!sym(loessVar)))+
    facet_wrap(~Site)+
    labs(y="Reported cases")


ReturnPlot <- ggplotly(SLDLoessGraphic,tooltip=c("info","Date"))%>%
    add_lines(x=~Date, y=MainDF[[SelectedIndVar]],
            yaxis="y2", data=MainDF, showlegend=FALSE, inherit=FALSE) %>%
    layout(yaxis2 = SecondAxisFormat,
           legend=list(title=list(text=''),x = 1.15, y = 0.9),
           margin = list(l = 50, r = 75, b = 50, t = 50, pad = 4))
return(ReturnPlot)
}


LoessPlotFunc(MadDF)
LoessPlotFunc(P2DF)
LoessPlotFunc(P7DF)
LoessPlotFunc(P8DF)
LoessPlotFunc(P11DF)
LoessPlotFunc(P18DF)